{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Convert Kinetics Library to Training Reactions Script\n",
    "\n",
    "Specify the kinetics library name below and run the script.  It automatically overwrites the training reactions files it needs to.  Then you should commit those files.\n",
    "\n",
    "This script only trains safely.  In other words, if a single match from an RMG family is found, a training reaction is created.  Sometimes, there are no matches from RMG reaction families, or multiple matches.  This indicates an error that requires manual fixing, and a printout is given in the script."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set libraries to load reactions from; set to None to load all libraries\n",
    "libraries = ['vinylCPD_H']\n",
    "\n",
    "# Set families to add training reactions to; either 'all' or a list, e.g. ['R_Addition_MultipleBond']\n",
    "families = ['Intra_R_Add_Endocyclic']\n",
    "\n",
    "# Specify whether to plot kinetics comparisons\n",
    "compare_kinetics = True\n",
    "\n",
    "# Specify whether to print library reactions which don't fit in the specified families\n",
    "# This can result in a lot of unnecessary output if only using a few families\n",
    "show_all = False\n",
    "\n",
    "# Specify whether to prioritize aromatic resonance structures to reduce cases of multiple matches\n",
    "filter_aromatic = True\n",
    "\n",
    "# Specify whether to use verbose comments when averaging tree\n",
    "verbose_comments = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from rmgpy import settings\n",
    "from rmgpy.data.rmg import RMGDatabase\n",
    "from kinetics_library_to_training_tools import *"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "## Step 1: Load RMG-database with specified libraries and families"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "database = RMGDatabase()\n",
    "database.load(\n",
    "    path = settings['database.directory'],\n",
    "    thermo_libraries = ['primaryThermoLibrary'],  # Can add others if necessary\n",
    "    kinetics_families = families,\n",
    "    reaction_libraries = libraries,\n",
    "    kinetics_depositories = ['training'],\n",
    ")\n",
    "# If we want accurate kinetics comparison, add existing training reactions and fill tree by averaging\n",
    "if compare_kinetics:\n",
    "    for family in database.kinetics.families.values():\n",
    "        family.add_rules_from_training(thermo_database=database.thermo)\n",
    "        family.fill_rules_by_averaging_up(verbose=verbose_comments)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2a: Generate library reactions from families to get proper labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "master_dict, multiple_dict = process_reactions(database,\n",
    "                                               libraries,\n",
    "                                               families,\n",
    "                                               compare_kinetics=compare_kinetics,\n",
    "                                               show_all=show_all,\n",
    "                                               filter_aromatic=filter_aromatic)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2b (optional): Review and select reactions to be added"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "review_reactions(master_dict, prompt=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2c (optional): Manual processing for reactions with multiple matches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "manual_selection(master_dict, multiple_dict, database)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2d: Final review of reactions to be added"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "review_reactions(master_dict, prompt=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 3: Write the new training reactions to the database"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for library_name, reaction_dict in master_dict.items():\n",
    "    library = database.kinetics.libraries[library_name]\n",
    "    \n",
    "    for family_name, reaction_list in reaction_dict.items():\n",
    "        print('Adding training reactions from {0} to {1}...'.format(library_name, family_name))\n",
    "\n",
    "        family = database.kinetics.families[family_name]\n",
    "        try:\n",
    "            depository = family.get_training_depository()\n",
    "        except:\n",
    "            raise Exception('Unable to find training depository in {0}. Check that one exists.'.format(family_name))\n",
    "\n",
    "        print('Training depository previously had {} rxns. Now adding {} new rxn(s).'.format(len(depository.entries), len(reaction_list)))\n",
    "\n",
    "        ref_list = []\n",
    "        type_list = []\n",
    "        short_list = []\n",
    "        long_list = []\n",
    "        \n",
    "        for reaction in reaction_list:\n",
    "            # Get the original entry to retrieve metadata\n",
    "            orig_entry = library.entries[reaction.index]\n",
    "            short_desc = orig_entry.short_desc\n",
    "            long_desc = 'Training reaction from kinetics library: {0}\\nOriginal entry: {1}'.format(library_name, orig_entry.label)\n",
    "            if orig_entry.long_desc:\n",
    "                long_desc += '\\n' + orig_entry.long_desc\n",
    "            \n",
    "            ref_list.append(orig_entry.reference)\n",
    "            type_list.append(orig_entry.reference_type)\n",
    "            short_list.append(short_desc)\n",
    "            long_list.append(long_desc)\n",
    "            \n",
    "        family.save_training_reactions(\n",
    "            reaction_list,\n",
    "            reference=ref_list,\n",
    "            reference_type=type_list,\n",
    "            short_desc=short_list,\n",
    "            long_desc=long_list,\n",
    "        )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda env:rmg_env]",
   "language": "python",
   "name": "conda-env-rmg_env-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
